###############################
# Analysis of conjoint data 
###############################

rm(list=ls())

# load packages 
library(foreign)
library(lmtest)
library(sandwich)
library(stargazer)
library(msm)
library(Hmisc)

# load function that does clustered SEs
vcovCluster <- function(
  model,
  cluster
)
{
  require(sandwich)
  require(lmtest)
  if(nrow(model.matrix(model))!=length(cluster)){
    stop("check your data: cluster variable has different N than model")
  }
  M <- length(unique(cluster))
  N <- length(cluster)           
  K <- model$rank   
  dfc <- (M/(M - 1)) * ((N - 1)/(N - K))
  uj  <- apply(estfun(model), 2, function(x) tapply(x, cluster, sum));
  rcse.cov <- dfc * sandwich(model, meat = crossprod(uj)/N)
  return(rcse.cov)
}

##############################
# Prepare data
##############################

# load data
d = read.dta("~/Dropbox/Cerrillos 2017/08_replication/conjoint_cerrillos_LAPS.dta")
table(d$failcon)
names(d)
dim(d)

# check data
d$idnum = as.numeric(d$idnum)
d$failcon[is.na(d$failcon)]=0
table(d$failcon, exclude = NULL) 
prop.table(table(d$failcon, exclude = NULL))
table(d$enumerator)
table(d$selected, exclude = NULL)

# drop failcon and non-responses
d = subset(d, d$selected!=99)
d = subset(d, d$selected!=88)
table(d$selected, exclude = NULL)
table(d$failcon, exclude = NULL)

# Vote past election
table(d$a11)
d$votepastelection = 0
d$votepastelection[d$a11==1]=1
table(d$votepastelection)
round(prop.table(table(d$votepastelection)),2)

# Vote next election
d$a10 = as.numeric(d$a10)
table(d$a10, exclude = NULL)
d$votenextelection = 0
d$votenextelection[d$a10<7]=1
table(d$votenextelection)
round(prop.table(table(d$votenextelection)),2)

d$novotenextelection = 1 - d$votenextelection
table(d$novotenextelection)
round(prop.table(table(d$novotenextelection)),2)

# Name last election
table(d$a12)
d$namepastelection = 0
d$namepastelection[d$a12==1]=1
d$namepastelection[d$a12==2]=1
d$namepastelection[d$a12==3]=1
d$namepastelection[d$a12==4]=1
d$namepastelection[d$a12==5]=1
table(d$namepastelection)
prop.table(table(d$namepastelection))

# Interest in politics
table(d$a3, exclude = NULL)
d$interest = 0
d$interest[d$a3<4]=1
table(d$interest)
prop.table(table(d$interest))

# Likely voter 1
d$likely1 = 0
d$likely1[d$interest==1 & d$votepastelection==1 & d$votenextelection==1]=1
prop.table(table(d$likely1))
d$nolikely1 = 1 - d$likely1

# Likely voter 2
d$likely2 = 0
d$likely2[d$votepastelection==1 & d$votenextelection==1]=1
prop.table(table(d$likely2))
d$nolikely2 = 1 - d$likely2

# Likely voter 3
d$likely3 = 0
d$likely3[d$votepastelection==1 & d$interest==1]=1
prop.table(table(d$likely3))
d$nolikely3 = 1 - d$likely3

# ideology and likely
table(d$a5,d$likely1)

# same age
d$idnum_pair = as.numeric(paste(d$idnum,d$pair,sep=""))
length(unique(d$idnum_pair))

d$atage2 = as.numeric(d$atage)

d$sd_age = NA
for (i in d$idnum_pair) {
  
  d$sd_age[d$idnum_pair==i] = sd(d$atage2[d$idnum_pair==i])
  
}
table(d$sd_age)

d = d[d$sd_age==0,]

# 30 years
table(d$atage)
d$years30 = 0
d$years30[d$atage==30]=1
table(d$years30)

d$idnum_pair = paste(d$idnum,d$pair,sep="_")

d$idnum_pair_30 = 0
d$idnum_pair_30[d$years30==1]=1
table(d$idnum_pair_30)

code = d$idnum_pair[d$idnum_pair_30==1]

d$pair_30 = 0

for (i in code) {
  
  d$pair_30[d$idnum_pair==i] = 1
  
}

table(d$pair_30)

# remove 30
d = d[d$pair_30==0,]

# prepare samples
table(d$a5, exclude = NULL)

d_left = d[d$a5<5,]
table(d_left$a5)

d_right = d[d$a5>6 & d$a5<11,]
table(d_right$a5)

d_center = d[d$a5>4 & d$a5<7,]
table(d_center$a5)

d_noideo = d[d$a5==88 | d$a5==99,]
table(d_noideo$a5)

###################
# Main results
###################

# All
describe(d$idnum)
all <- lm(d$outcome ~ d$atideology + d$atprofession + d$atage)
all_c <- round(coeftest(all, vcov = vcovCluster(all, cluster = d$idnum)),4)
all_c

# Left
describe(d_left$idnum)
left <- lm(d_left$outcome ~ d_left$atideology + d_left$atprofession)
summary(left)
left_c <- round(coeftest(left, vcov = vcovCluster(left, cluster = d_left$idnum)),4)
left_c

# Right
describe(d_right$idnum)
right <- lm(d_right$outcome ~ d_right$atideology + d_right$atprofession)
summary(right)
right_c <- round(coeftest(right, vcov = vcovCluster(right, cluster = d_right$idnum)),4)
right_c

# Center
describe(d_center$idnum)
center <- lm(d_center$outcome ~ d_center$atideology + d_center$atprofession)
summary(center)
center_c <- round(coeftest(center, vcov = vcovCluster(center, cluster = d_center$idnum)),4)
center_c

# No ideology
describe(d_noideo$idnum)
noideo <- lm(d_noideo$outcome ~ d_noideo$atideology + d_noideo$atprofession)
summary(noideo)
noideo_c <- round(coeftest(noideo, vcov = vcovCluster(noideo, cluster = d_noideo$idnum)),4)
noideo_c

#####################
# Prepare plots
#####################

# left 
left_c # regression results
coeff = left_c[,1] # save coefficientes
coeff2 = cbind(coeff) # matrix of coefficientes
c_se = left_c[,2] # save cluster standard errors
c_se2 = cbind(c_se) # matrix of cluster standard errors
results = data.frame(coeff2,c_se2) # generate dataset 
colnames(results) = c("y1","r1")  # change column names
results = results[-1,] # drop intercept 
baseline =  c(0,0) # gen baseline rows
results <- rbind(baseline,results[1, ], baseline,results[2:3, ]) # add baselines
rownames(results) = c("1b.atideology","2.atideology",
                      "1b.atprofession","2.atprofession","3.atprofession")
results 
write.table(results, "~/Dropbox/Cerrillos 2017/08_replication/028_left_no30.txt", sep="\t")

# right
right_c # regression results
coeff = right_c[,1] # save coefficientes
coeff2 = cbind(coeff) # matrix of coefficientes
c_se = right_c[,2] # save cluster standard errors
c_se2 = cbind(c_se) # matrix of cluster standard errors
results = data.frame(coeff2,c_se2) # generate dataset 
colnames(results) = c("y1","r1")  # change column names
results = results[-1,] # drop intercept 
baseline =  c(0,0) # gen baseline rows
results <- rbind(baseline,results[1, ], baseline,results[2:3, ]) # add baselines
rownames(results) = c("1b.atideology","2.atideology",
                      "1b.atprofession","2.atprofession","3.atprofession")
results 
write.table(results, "~/Dropbox/Cerrillos 2017/08_replication/029_right_no30.txt", sep="\t")

# center
center_c # regression results
coeff = center_c[,1] # save coefficientes
coeff2 = cbind(coeff) # matrix of coefficientes
c_se = center_c[,2] # save cluster standard errors
c_se2 = cbind(c_se) # matrix of cluster standard errors
results = data.frame(coeff2,c_se2) # generate dataset 
colnames(results) = c("y1","r1")  # change column names
results = results[-1,] # drop intercept 
baseline =  c(0,0) # gen baseline rows
results <- rbind(baseline,results[1, ], baseline,results[2:3, ]) # add baselines
rownames(results) = c("1b.atideology","2.atideology",
                      "1b.atprofession","2.atprofession","3.atprofession")
results 
write.table(results, "~/Dropbox/Cerrillos 2017/08_replication/030_center_no30.txt", sep="\t")

# no ideology
noideo_c # regression results
coeff = noideo_c[,1] # save coefficientes
coeff2 = cbind(coeff) # matrix of coefficientes
c_se = noideo_c[,2] # save cluster standard errors
c_se2 = cbind(c_se) # matrix of cluster standard errors
results = data.frame(coeff2,c_se2) # generate dataset 
colnames(results) = c("y1","r1")  # change column names
results = results[-1,] # drop intercept 
baseline =  c(0,0) # gen baseline rows
results <- rbind(baseline,results[1, ], baseline,results[2:3, ]) # add baselines
rownames(results) = c("1b.atideology","2.atideology",
                      "1b.atprofession","2.atprofession","3.atprofession")
results 
write.table(results, "~/Dropbox/Cerrillos 2017/08_replication/031_noideo_no30.txt", sep="\t")
